{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Target / mean encoding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A1</th>\n",
       "      <th>A2</th>\n",
       "      <th>A3</th>\n",
       "      <th>A4</th>\n",
       "      <th>A5</th>\n",
       "      <th>A6</th>\n",
       "      <th>A7</th>\n",
       "      <th>A8</th>\n",
       "      <th>A9</th>\n",
       "      <th>A10</th>\n",
       "      <th>A11</th>\n",
       "      <th>A12</th>\n",
       "      <th>A13</th>\n",
       "      <th>A14</th>\n",
       "      <th>A15</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>b</td>\n",
       "      <td>30.83</td>\n",
       "      <td>0.000</td>\n",
       "      <td>u</td>\n",
       "      <td>g</td>\n",
       "      <td>w</td>\n",
       "      <td>v</td>\n",
       "      <td>1.25</td>\n",
       "      <td>t</td>\n",
       "      <td>t</td>\n",
       "      <td>1</td>\n",
       "      <td>f</td>\n",
       "      <td>g</td>\n",
       "      <td>202.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>a</td>\n",
       "      <td>58.67</td>\n",
       "      <td>4.460</td>\n",
       "      <td>u</td>\n",
       "      <td>g</td>\n",
       "      <td>q</td>\n",
       "      <td>h</td>\n",
       "      <td>3.04</td>\n",
       "      <td>t</td>\n",
       "      <td>t</td>\n",
       "      <td>6</td>\n",
       "      <td>f</td>\n",
       "      <td>g</td>\n",
       "      <td>43.0</td>\n",
       "      <td>560</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a</td>\n",
       "      <td>24.50</td>\n",
       "      <td>0.500</td>\n",
       "      <td>u</td>\n",
       "      <td>g</td>\n",
       "      <td>q</td>\n",
       "      <td>h</td>\n",
       "      <td>1.50</td>\n",
       "      <td>t</td>\n",
       "      <td>f</td>\n",
       "      <td>0</td>\n",
       "      <td>f</td>\n",
       "      <td>g</td>\n",
       "      <td>280.0</td>\n",
       "      <td>824</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>b</td>\n",
       "      <td>27.83</td>\n",
       "      <td>1.540</td>\n",
       "      <td>u</td>\n",
       "      <td>g</td>\n",
       "      <td>w</td>\n",
       "      <td>v</td>\n",
       "      <td>3.75</td>\n",
       "      <td>t</td>\n",
       "      <td>t</td>\n",
       "      <td>5</td>\n",
       "      <td>t</td>\n",
       "      <td>g</td>\n",
       "      <td>100.0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>b</td>\n",
       "      <td>20.17</td>\n",
       "      <td>5.625</td>\n",
       "      <td>u</td>\n",
       "      <td>g</td>\n",
       "      <td>w</td>\n",
       "      <td>v</td>\n",
       "      <td>1.71</td>\n",
       "      <td>t</td>\n",
       "      <td>f</td>\n",
       "      <td>0</td>\n",
       "      <td>f</td>\n",
       "      <td>s</td>\n",
       "      <td>120.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  A1     A2     A3 A4 A5 A6 A7    A8 A9 A10  A11 A12 A13    A14  A15  target\n",
       "0  b  30.83  0.000  u  g  w  v  1.25  t   t    1   f   g  202.0    0       1\n",
       "1  a  58.67  4.460  u  g  q  h  3.04  t   t    6   f   g   43.0  560       1\n",
       "2  a  24.50  0.500  u  g  q  h  1.50  t   f    0   f   g  280.0  824       1\n",
       "3  b  27.83  1.540  u  g  w  v  3.75  t   t    5   t   g  100.0    3       1\n",
       "4  b  20.17  5.625  u  g  w  v  1.71  t   f    0   f   s  120.0    0       1"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's load the data set\n",
    "\n",
    "data = pd.read_csv(\"credit_approval_uci.csv\")\n",
    "\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((483, 15), (207, 15))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Let's separate into training and testing sets\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    data.drop(labels=[\"target\"], axis=1),  # predictors\n",
    "    data[\"target\"],  # target\n",
    "    test_size=0.3,  # percentage of observations in test set\n",
    "    random_state=0,  # seed to ensure reproducibility\n",
    ")\n",
    "\n",
    "X_train.shape, X_test.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Target mean encoding with pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Missing': 1.0,\n",
       " 'bb': 0.5128205128205128,\n",
       " 'dd': 0.4,\n",
       " 'ff': 0.14634146341463414,\n",
       " 'h': 0.6039603960396039,\n",
       " 'j': 0.2,\n",
       " 'n': 0.6666666666666666,\n",
       " 'o': 0.0,\n",
       " 'v': 0.4187725631768953,\n",
       " 'z': 0.7142857142857143}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's create a dictionary with the target mean value\n",
    "# per category for the variable A7\n",
    "\n",
    "mapping = y_train.groupby(X_train[\"A7\"]).mean().to_dict()\n",
    "\n",
    "mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace the labels with the target mean\n",
    "\n",
    "X_train_enc = X_train.copy()\n",
    "X_test_enc = X_test.copy()\n",
    "\n",
    "X_train_enc[\"A7\"] = X_train_enc[\"A7\"].map(mapping)\n",
    "X_test_enc[\"A7\"] = X_test_enc[\"A7\"].map(mapping)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Target encoding with Scikit-learn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import TargetEncoder\n",
    "from sklearn.compose import ColumnTransformer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['A1', 'A4', 'A5', 'A6', 'A7', 'A9', 'A10', 'A12', 'A13']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's select the categorical variables\n",
    "\n",
    "cat_vars = X_train.select_dtypes(include=\"O\").columns.to_list()\n",
    "\n",
    "cat_vars"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "enc = TargetEncoder(smooth=\"auto\", random_state=9)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Indicate which variables to encode:\n",
    "\n",
    "ct = ColumnTransformer(\n",
    "    [(\"encoder\", enc, cat_vars)],\n",
    "    remainder=\"passthrough\",\n",
    ").set_output(transform=\"pandas\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# let's transform train and test set\n",
    "\n",
    "X_train_enc = ct.fit_transform(X_train, y_train)\n",
    "X_test_enc = ct.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([0.48977085, 0.47206282, 0.43883698]),\n",
       " array([1.        , 1.        , 0.51222159, 0.22745101]),\n",
       " array([1.        , 0.51222159, 1.        , 0.22745101]),\n",
       " array([1.        , 0.44140706, 0.45158801, 0.69310186, 0.19793494,\n",
       "        0.61167676, 0.10867498, 0.27172849, 0.267243  , 0.2928946 ,\n",
       "        0.4240346 , 0.62207703, 1.        , 0.49895426, 0.78241398]),\n",
       " array([1.        , 0.51121681, 0.4080061 , 0.15002659, 0.60249387,\n",
       "        0.22854703, 0.61657956, 0.        , 0.41888051, 0.68635092]),\n",
       " array([0.07092592, 0.78426413]),\n",
       " array([0.25147697, 0.70182461]),\n",
       " array([0.45245936, 0.44547181]),\n",
       " array([0.46481717, 1.        , 0.21467342])]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ct.named_transformers_[\"encoder\"].encodings_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>encoder__A1</th>\n",
       "      <th>encoder__A4</th>\n",
       "      <th>encoder__A5</th>\n",
       "      <th>encoder__A6</th>\n",
       "      <th>encoder__A7</th>\n",
       "      <th>encoder__A9</th>\n",
       "      <th>encoder__A10</th>\n",
       "      <th>encoder__A12</th>\n",
       "      <th>encoder__A13</th>\n",
       "      <th>remainder__A2</th>\n",
       "      <th>remainder__A3</th>\n",
       "      <th>remainder__A8</th>\n",
       "      <th>remainder__A11</th>\n",
       "      <th>remainder__A14</th>\n",
       "      <th>remainder__A15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>596</th>\n",
       "      <td>0.475949</td>\n",
       "      <td>0.517129</td>\n",
       "      <td>0.517129</td>\n",
       "      <td>0.432675</td>\n",
       "      <td>0.408258</td>\n",
       "      <td>0.812012</td>\n",
       "      <td>0.719660</td>\n",
       "      <td>0.454524</td>\n",
       "      <td>0.467189</td>\n",
       "      <td>46.08</td>\n",
       "      <td>3.000</td>\n",
       "      <td>2.375</td>\n",
       "      <td>8</td>\n",
       "      <td>396.0</td>\n",
       "      <td>4159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303</th>\n",
       "      <td>0.474353</td>\n",
       "      <td>0.513565</td>\n",
       "      <td>0.513565</td>\n",
       "      <td>0.655568</td>\n",
       "      <td>0.430705</td>\n",
       "      <td>0.073196</td>\n",
       "      <td>0.250681</td>\n",
       "      <td>0.449070</td>\n",
       "      <td>0.460417</td>\n",
       "      <td>15.92</td>\n",
       "      <td>2.875</td>\n",
       "      <td>0.085</td>\n",
       "      <td>0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>0.450182</td>\n",
       "      <td>0.235691</td>\n",
       "      <td>0.235691</td>\n",
       "      <td>0.486887</td>\n",
       "      <td>0.408253</td>\n",
       "      <td>0.759674</td>\n",
       "      <td>0.687389</td>\n",
       "      <td>0.458086</td>\n",
       "      <td>0.466809</td>\n",
       "      <td>36.33</td>\n",
       "      <td>2.125</td>\n",
       "      <td>0.085</td>\n",
       "      <td>1</td>\n",
       "      <td>50.0</td>\n",
       "      <td>1187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>351</th>\n",
       "      <td>0.431885</td>\n",
       "      <td>0.277459</td>\n",
       "      <td>0.277459</td>\n",
       "      <td>0.100749</td>\n",
       "      <td>0.129423</td>\n",
       "      <td>0.067201</td>\n",
       "      <td>0.249536</td>\n",
       "      <td>0.456697</td>\n",
       "      <td>0.466055</td>\n",
       "      <td>22.17</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118</th>\n",
       "      <td>0.450182</td>\n",
       "      <td>0.510204</td>\n",
       "      <td>0.510204</td>\n",
       "      <td>0.377833</td>\n",
       "      <td>0.408253</td>\n",
       "      <td>0.759674</td>\n",
       "      <td>0.687389</td>\n",
       "      <td>0.440268</td>\n",
       "      <td>0.466809</td>\n",
       "      <td>57.83</td>\n",
       "      <td>7.040</td>\n",
       "      <td>14.000</td>\n",
       "      <td>6</td>\n",
       "      <td>360.0</td>\n",
       "      <td>1332</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     encoder__A1  encoder__A4  encoder__A5  encoder__A6  encoder__A7  \\\n",
       "596     0.475949     0.517129     0.517129     0.432675     0.408258   \n",
       "303     0.474353     0.513565     0.513565     0.655568     0.430705   \n",
       "204     0.450182     0.235691     0.235691     0.486887     0.408253   \n",
       "351     0.431885     0.277459     0.277459     0.100749     0.129423   \n",
       "118     0.450182     0.510204     0.510204     0.377833     0.408253   \n",
       "\n",
       "     encoder__A9  encoder__A10  encoder__A12  encoder__A13  remainder__A2  \\\n",
       "596     0.812012      0.719660      0.454524      0.467189          46.08   \n",
       "303     0.073196      0.250681      0.449070      0.460417          15.92   \n",
       "204     0.759674      0.687389      0.458086      0.466809          36.33   \n",
       "351     0.067201      0.249536      0.456697      0.466055          22.17   \n",
       "118     0.759674      0.687389      0.440268      0.466809          57.83   \n",
       "\n",
       "     remainder__A3  remainder__A8  remainder__A11  remainder__A14  \\\n",
       "596          3.000          2.375               8           396.0   \n",
       "303          2.875          0.085               0           120.0   \n",
       "204          2.125          0.085               1            50.0   \n",
       "351          0.585          0.000               0           100.0   \n",
       "118          7.040         14.000               6           360.0   \n",
       "\n",
       "     remainder__A15  \n",
       "596            4159  \n",
       "303               0  \n",
       "204            1187  \n",
       "351               0  \n",
       "118            1332  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train_enc.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Target mean encoding with Feature-engine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "from feature_engine.encoding import MeanEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# let's create the encoder that automatically finds\n",
    "# the categorical variables\n",
    "\n",
    "mean_enc = MeanEncoder(smoothing=\"auto\", variables=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-1 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-1 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-1 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-1 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-1 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-1 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-1 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-1 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MeanEncoder(smoothing=&#x27;auto&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;MeanEncoder<span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>MeanEncoder(smoothing=&#x27;auto&#x27;)</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "MeanEncoder(smoothing='auto')"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# with fit, the transformer learns the numerical\n",
    "# representation for each category, that is, the\n",
    "# mean target value per category\n",
    "\n",
    "# when fitting the transformer, we need to pass the target as well\n",
    "# just like with any Scikit-learn predictor class\n",
    "\n",
    "mean_enc.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['A1', 'A4', 'A5', 'A6', 'A7', 'A9', 'A10', 'A12', 'A13']"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# variables that will be encoded\n",
    "\n",
    "mean_enc.variables_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A1': {'Missing': 0.4897708500995383,\n",
       "  'a': 0.47206282385504167,\n",
       "  'b': 0.4388369819348539},\n",
       " 'A4': {'Missing': 1.0,\n",
       "  'l': 1.0,\n",
       "  'u': 0.5122215930819429,\n",
       "  'y': 0.22745101171497045},\n",
       " 'A5': {'Missing': 1.0,\n",
       "  'g': 0.5122215930819429,\n",
       "  'gg': 1.0,\n",
       "  'p': 0.22745101171497045},\n",
       " 'A6': {'Missing': 1.0,\n",
       "  'aa': 0.4414070613807244,\n",
       "  'c': 0.45158801273269944,\n",
       "  'cc': 0.6931018626679666,\n",
       "  'd': 0.19793494395099673,\n",
       "  'e': 0.6116767599605843,\n",
       "  'ff': 0.10867497814869279,\n",
       "  'i': 0.2717284862549896,\n",
       "  'j': 0.26724299829766096,\n",
       "  'k': 0.2928945960903988,\n",
       "  'm': 0.42403460050172886,\n",
       "  'q': 0.622077031849362,\n",
       "  'r': 1.0,\n",
       "  'w': 0.4989542602527962,\n",
       "  'x': 0.7824139821749067},\n",
       " 'A7': {'Missing': 1.0,\n",
       "  'bb': 0.5112168095503893,\n",
       "  'dd': 0.4080060972835237,\n",
       "  'ff': 0.15002659488133335,\n",
       "  'h': 0.6024938698481956,\n",
       "  'j': 0.22854703116167752,\n",
       "  'n': 0.6165795586527293,\n",
       "  'o': 0.0,\n",
       "  'v': 0.41888050693409623,\n",
       "  'z': 0.6863509157249638},\n",
       " 'A9': {'f': 0.07092591902432215, 't': 0.7842641326398047},\n",
       " 'A10': {'f': 0.251476972417426, 't': 0.7018246050448415},\n",
       " 'A12': {'f': 0.4524593610653695, 't': 0.44547180624095933},\n",
       " 'A13': {'g': 0.4648171749763738, 'p': 1.0, 's': 0.2146734199776911}}"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# in the encoder_dict_ we find the target mean per category\n",
    "# per variable\n",
    "\n",
    "mean_enc.encoder_dict_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# let's transform train and test set\n",
    "\n",
    "X_train_enc = mean_enc.transform(X_train)\n",
    "X_test_enc = mean_enc.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A1</th>\n",
       "      <th>A2</th>\n",
       "      <th>A3</th>\n",
       "      <th>A4</th>\n",
       "      <th>A5</th>\n",
       "      <th>A6</th>\n",
       "      <th>A7</th>\n",
       "      <th>A8</th>\n",
       "      <th>A9</th>\n",
       "      <th>A10</th>\n",
       "      <th>A11</th>\n",
       "      <th>A12</th>\n",
       "      <th>A13</th>\n",
       "      <th>A14</th>\n",
       "      <th>A15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>596</th>\n",
       "      <td>0.472063</td>\n",
       "      <td>46.08</td>\n",
       "      <td>3.000</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.451588</td>\n",
       "      <td>0.418881</td>\n",
       "      <td>2.375</td>\n",
       "      <td>0.784264</td>\n",
       "      <td>0.701825</td>\n",
       "      <td>8</td>\n",
       "      <td>0.445472</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>396.0</td>\n",
       "      <td>4159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303</th>\n",
       "      <td>0.472063</td>\n",
       "      <td>15.92</td>\n",
       "      <td>2.875</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.622077</td>\n",
       "      <td>0.418881</td>\n",
       "      <td>0.085</td>\n",
       "      <td>0.070926</td>\n",
       "      <td>0.251477</td>\n",
       "      <td>0</td>\n",
       "      <td>0.452459</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>120.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>0.438837</td>\n",
       "      <td>36.33</td>\n",
       "      <td>2.125</td>\n",
       "      <td>0.227451</td>\n",
       "      <td>0.227451</td>\n",
       "      <td>0.498954</td>\n",
       "      <td>0.418881</td>\n",
       "      <td>0.085</td>\n",
       "      <td>0.784264</td>\n",
       "      <td>0.701825</td>\n",
       "      <td>1</td>\n",
       "      <td>0.452459</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>50.0</td>\n",
       "      <td>1187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>351</th>\n",
       "      <td>0.438837</td>\n",
       "      <td>22.17</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.227451</td>\n",
       "      <td>0.227451</td>\n",
       "      <td>0.108675</td>\n",
       "      <td>0.150027</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.070926</td>\n",
       "      <td>0.251477</td>\n",
       "      <td>0</td>\n",
       "      <td>0.452459</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118</th>\n",
       "      <td>0.438837</td>\n",
       "      <td>57.83</td>\n",
       "      <td>7.040</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.424035</td>\n",
       "      <td>0.418881</td>\n",
       "      <td>14.000</td>\n",
       "      <td>0.784264</td>\n",
       "      <td>0.701825</td>\n",
       "      <td>6</td>\n",
       "      <td>0.445472</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>360.0</td>\n",
       "      <td>1332</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           A1     A2     A3        A4        A5        A6        A7      A8  \\\n",
       "596  0.472063  46.08  3.000  0.512222  0.512222  0.451588  0.418881   2.375   \n",
       "303  0.472063  15.92  2.875  0.512222  0.512222  0.622077  0.418881   0.085   \n",
       "204  0.438837  36.33  2.125  0.227451  0.227451  0.498954  0.418881   0.085   \n",
       "351  0.438837  22.17  0.585  0.227451  0.227451  0.108675  0.150027   0.000   \n",
       "118  0.438837  57.83  7.040  0.512222  0.512222  0.424035  0.418881  14.000   \n",
       "\n",
       "           A9       A10  A11       A12       A13    A14   A15  \n",
       "596  0.784264  0.701825    8  0.445472  0.464817  396.0  4159  \n",
       "303  0.070926  0.251477    0  0.452459  0.464817  120.0     0  \n",
       "204  0.784264  0.701825    1  0.452459  0.464817   50.0  1187  \n",
       "351  0.070926  0.251477    0  0.452459  0.464817  100.0     0  \n",
       "118  0.784264  0.701825    6  0.445472  0.464817  360.0  1332  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Let's inspect the transformed dataset\n",
    "\n",
    "X_train_enc.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A1</th>\n",
       "      <th>A2</th>\n",
       "      <th>A3</th>\n",
       "      <th>A4</th>\n",
       "      <th>A5</th>\n",
       "      <th>A6</th>\n",
       "      <th>A7</th>\n",
       "      <th>A8</th>\n",
       "      <th>A9</th>\n",
       "      <th>A10</th>\n",
       "      <th>A11</th>\n",
       "      <th>A12</th>\n",
       "      <th>A13</th>\n",
       "      <th>A14</th>\n",
       "      <th>A15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>0.472063</td>\n",
       "      <td>45.83</td>\n",
       "      <td>10.50</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.622077</td>\n",
       "      <td>0.418881</td>\n",
       "      <td>5.000</td>\n",
       "      <td>0.784264</td>\n",
       "      <td>0.701825</td>\n",
       "      <td>7</td>\n",
       "      <td>0.445472</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>586</th>\n",
       "      <td>0.438837</td>\n",
       "      <td>64.08</td>\n",
       "      <td>20.00</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.782414</td>\n",
       "      <td>0.602494</td>\n",
       "      <td>17.500</td>\n",
       "      <td>0.784264</td>\n",
       "      <td>0.701825</td>\n",
       "      <td>9</td>\n",
       "      <td>0.445472</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>140</th>\n",
       "      <td>0.472063</td>\n",
       "      <td>31.25</td>\n",
       "      <td>3.75</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.693102</td>\n",
       "      <td>0.602494</td>\n",
       "      <td>0.625</td>\n",
       "      <td>0.784264</td>\n",
       "      <td>0.701825</td>\n",
       "      <td>9</td>\n",
       "      <td>0.445472</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>181.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>492</th>\n",
       "      <td>0.438837</td>\n",
       "      <td>39.25</td>\n",
       "      <td>9.50</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.424035</td>\n",
       "      <td>0.418881</td>\n",
       "      <td>6.500</td>\n",
       "      <td>0.784264</td>\n",
       "      <td>0.701825</td>\n",
       "      <td>14</td>\n",
       "      <td>0.452459</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>240.0</td>\n",
       "      <td>4607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>350</th>\n",
       "      <td>0.472063</td>\n",
       "      <td>26.17</td>\n",
       "      <td>2.00</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.512222</td>\n",
       "      <td>0.267243</td>\n",
       "      <td>0.228547</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.070926</td>\n",
       "      <td>0.251477</td>\n",
       "      <td>0</td>\n",
       "      <td>0.445472</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>276.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           A1     A2     A3        A4        A5        A6        A7      A8  \\\n",
       "14   0.472063  45.83  10.50  0.512222  0.512222  0.622077  0.418881   5.000   \n",
       "586  0.438837  64.08  20.00  0.512222  0.512222  0.782414  0.602494  17.500   \n",
       "140  0.472063  31.25   3.75  0.512222  0.512222  0.693102  0.602494   0.625   \n",
       "492  0.438837  39.25   9.50  0.512222  0.512222  0.424035  0.418881   6.500   \n",
       "350  0.472063  26.17   2.00  0.512222  0.512222  0.267243  0.228547   0.000   \n",
       "\n",
       "           A9       A10  A11       A12       A13    A14   A15  \n",
       "14   0.784264  0.701825    7  0.445472  0.464817    0.0     0  \n",
       "586  0.784264  0.701825    9  0.445472  0.464817    0.0  1000  \n",
       "140  0.784264  0.701825    9  0.445472  0.464817  181.0     0  \n",
       "492  0.784264  0.701825   14  0.452459  0.464817  240.0  4607  \n",
       "350  0.070926  0.251477    0  0.445472  0.464817  276.0     1  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Let's inspect the transformed dataset\n",
    "\n",
    "X_test_enc.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Target mean encoding with Category Encoders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "from category_encoders.target_encoder import TargetEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# the parameter min_samples_leaf corresponds to the minimum\n",
    "# number of observations per category group needed to\n",
    "# estimate the target mean only from the posterior probability\n",
    "\n",
    "# the posterior probability is the target mean as estimated in the\n",
    "# previous cells using pandas\n",
    "\n",
    "# for those categories with less than 25 observations, the target\n",
    "# estimate will be a \"blended\" probability\n",
    "\n",
    "mean_enc = TargetEncoder(cols=None, min_samples_leaf=25, smoothing=1.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-2 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-2 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-2 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-2 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-2 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-2 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-2 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-2 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-2 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>TargetEncoder(cols=[&#x27;A1&#x27;, &#x27;A4&#x27;, &#x27;A5&#x27;, &#x27;A6&#x27;, &#x27;A7&#x27;, &#x27;A9&#x27;, &#x27;A10&#x27;, &#x27;A12&#x27;, &#x27;A13&#x27;],\n",
       "              min_samples_leaf=25, smoothing=1.0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;TargetEncoder<span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>TargetEncoder(cols=[&#x27;A1&#x27;, &#x27;A4&#x27;, &#x27;A5&#x27;, &#x27;A6&#x27;, &#x27;A7&#x27;, &#x27;A9&#x27;, &#x27;A10&#x27;, &#x27;A12&#x27;, &#x27;A13&#x27;],\n",
       "              min_samples_leaf=25, smoothing=1.0)</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "TargetEncoder(cols=['A1', 'A4', 'A5', 'A6', 'A7', 'A9', 'A10', 'A12', 'A13'],\n",
       "              min_samples_leaf=25, smoothing=1.0)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# with fit, the transformer learns the numerical\n",
    "# representation for each category\n",
    "\n",
    "mean_enc.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# let's transform train and test set\n",
    "\n",
    "X_train_enc = mean_enc.transform(X_train)\n",
    "X_test_enc = mean_enc.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A1</th>\n",
       "      <th>A2</th>\n",
       "      <th>A3</th>\n",
       "      <th>A4</th>\n",
       "      <th>A5</th>\n",
       "      <th>A6</th>\n",
       "      <th>A7</th>\n",
       "      <th>A8</th>\n",
       "      <th>A9</th>\n",
       "      <th>A10</th>\n",
       "      <th>A11</th>\n",
       "      <th>A12</th>\n",
       "      <th>A13</th>\n",
       "      <th>A14</th>\n",
       "      <th>A15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>596</th>\n",
       "      <td>0.472222</td>\n",
       "      <td>46.08</td>\n",
       "      <td>3.000</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.451613</td>\n",
       "      <td>0.418773</td>\n",
       "      <td>2.375</td>\n",
       "      <td>0.785156</td>\n",
       "      <td>0.702830</td>\n",
       "      <td>8</td>\n",
       "      <td>0.445455</td>\n",
       "      <td>0.464853</td>\n",
       "      <td>396.0</td>\n",
       "      <td>4159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303</th>\n",
       "      <td>0.472222</td>\n",
       "      <td>15.92</td>\n",
       "      <td>2.875</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.625000</td>\n",
       "      <td>0.418773</td>\n",
       "      <td>0.085</td>\n",
       "      <td>0.070485</td>\n",
       "      <td>0.250923</td>\n",
       "      <td>0</td>\n",
       "      <td>0.452471</td>\n",
       "      <td>0.464853</td>\n",
       "      <td>120.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>0.438806</td>\n",
       "      <td>36.33</td>\n",
       "      <td>2.125</td>\n",
       "      <td>0.226087</td>\n",
       "      <td>0.226087</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.418773</td>\n",
       "      <td>0.085</td>\n",
       "      <td>0.785156</td>\n",
       "      <td>0.702830</td>\n",
       "      <td>1</td>\n",
       "      <td>0.452471</td>\n",
       "      <td>0.464853</td>\n",
       "      <td>50.0</td>\n",
       "      <td>1187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>351</th>\n",
       "      <td>0.438806</td>\n",
       "      <td>22.17</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.226087</td>\n",
       "      <td>0.226087</td>\n",
       "      <td>0.105264</td>\n",
       "      <td>0.146341</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.070485</td>\n",
       "      <td>0.250923</td>\n",
       "      <td>0</td>\n",
       "      <td>0.452471</td>\n",
       "      <td>0.464853</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118</th>\n",
       "      <td>0.438806</td>\n",
       "      <td>57.83</td>\n",
       "      <td>7.040</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.430123</td>\n",
       "      <td>0.418773</td>\n",
       "      <td>14.000</td>\n",
       "      <td>0.785156</td>\n",
       "      <td>0.702830</td>\n",
       "      <td>6</td>\n",
       "      <td>0.445455</td>\n",
       "      <td>0.464853</td>\n",
       "      <td>360.0</td>\n",
       "      <td>1332</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           A1     A2     A3        A4        A5        A6        A7      A8  \\\n",
       "596  0.472222  46.08  3.000  0.512397  0.512397  0.451613  0.418773   2.375   \n",
       "303  0.472222  15.92  2.875  0.512397  0.512397  0.625000  0.418773   0.085   \n",
       "204  0.438806  36.33  2.125  0.226087  0.226087  0.500000  0.418773   0.085   \n",
       "351  0.438806  22.17  0.585  0.226087  0.226087  0.105264  0.146341   0.000   \n",
       "118  0.438806  57.83  7.040  0.512397  0.512397  0.430123  0.418773  14.000   \n",
       "\n",
       "           A9       A10  A11       A12       A13    A14   A15  \n",
       "596  0.785156  0.702830    8  0.445455  0.464853  396.0  4159  \n",
       "303  0.070485  0.250923    0  0.452471  0.464853  120.0     0  \n",
       "204  0.785156  0.702830    1  0.452471  0.464853   50.0  1187  \n",
       "351  0.070485  0.250923    0  0.452471  0.464853  100.0     0  \n",
       "118  0.785156  0.702830    6  0.445455  0.464853  360.0  1332  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Let's inspect the transformed data\n",
    "\n",
    "X_train_enc.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A1</th>\n",
       "      <th>A2</th>\n",
       "      <th>A3</th>\n",
       "      <th>A4</th>\n",
       "      <th>A5</th>\n",
       "      <th>A6</th>\n",
       "      <th>A7</th>\n",
       "      <th>A8</th>\n",
       "      <th>A9</th>\n",
       "      <th>A10</th>\n",
       "      <th>A11</th>\n",
       "      <th>A12</th>\n",
       "      <th>A13</th>\n",
       "      <th>A14</th>\n",
       "      <th>A15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>0.472222</td>\n",
       "      <td>45.83</td>\n",
       "      <td>10.50</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.625000</td>\n",
       "      <td>0.418773</td>\n",
       "      <td>5.000</td>\n",
       "      <td>0.785156</td>\n",
       "      <td>0.702830</td>\n",
       "      <td>7</td>\n",
       "      <td>0.445455</td>\n",
       "      <td>0.464853</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>586</th>\n",
       "      <td>0.438806</td>\n",
       "      <td>64.08</td>\n",
       "      <td>20.00</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.541359</td>\n",
       "      <td>0.603960</td>\n",
       "      <td>17.500</td>\n",
       "      <td>0.785156</td>\n",
       "      <td>0.702830</td>\n",
       "      <td>9</td>\n",
       "      <td>0.445455</td>\n",
       "      <td>0.464853</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>140</th>\n",
       "      <td>0.472222</td>\n",
       "      <td>31.25</td>\n",
       "      <td>3.75</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.698322</td>\n",
       "      <td>0.603960</td>\n",
       "      <td>0.625</td>\n",
       "      <td>0.785156</td>\n",
       "      <td>0.702830</td>\n",
       "      <td>9</td>\n",
       "      <td>0.445455</td>\n",
       "      <td>0.464853</td>\n",
       "      <td>181.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>492</th>\n",
       "      <td>0.438806</td>\n",
       "      <td>39.25</td>\n",
       "      <td>9.50</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.430123</td>\n",
       "      <td>0.418773</td>\n",
       "      <td>6.500</td>\n",
       "      <td>0.785156</td>\n",
       "      <td>0.702830</td>\n",
       "      <td>14</td>\n",
       "      <td>0.452471</td>\n",
       "      <td>0.464853</td>\n",
       "      <td>240.0</td>\n",
       "      <td>4607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>350</th>\n",
       "      <td>0.472222</td>\n",
       "      <td>26.17</td>\n",
       "      <td>2.00</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.512397</td>\n",
       "      <td>0.449275</td>\n",
       "      <td>0.449275</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.070485</td>\n",
       "      <td>0.250923</td>\n",
       "      <td>0</td>\n",
       "      <td>0.445455</td>\n",
       "      <td>0.464853</td>\n",
       "      <td>276.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           A1     A2     A3        A4        A5        A6        A7      A8  \\\n",
       "14   0.472222  45.83  10.50  0.512397  0.512397  0.625000  0.418773   5.000   \n",
       "586  0.438806  64.08  20.00  0.512397  0.512397  0.541359  0.603960  17.500   \n",
       "140  0.472222  31.25   3.75  0.512397  0.512397  0.698322  0.603960   0.625   \n",
       "492  0.438806  39.25   9.50  0.512397  0.512397  0.430123  0.418773   6.500   \n",
       "350  0.472222  26.17   2.00  0.512397  0.512397  0.449275  0.449275   0.000   \n",
       "\n",
       "           A9       A10  A11       A12       A13    A14   A15  \n",
       "14   0.785156  0.702830    7  0.445455  0.464853    0.0     0  \n",
       "586  0.785156  0.702830    9  0.445455  0.464853    0.0  1000  \n",
       "140  0.785156  0.702830    9  0.445455  0.464853  181.0     0  \n",
       "492  0.785156  0.702830   14  0.452471  0.464853  240.0  4607  \n",
       "350  0.070485  0.250923    0  0.445455  0.464853  276.0     1  "
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Let's inspect the transformed data\n",
    "\n",
    "X_test_enc.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# M-estimate encoding with Category Encoders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "from category_encoders.m_estimate import MEstimateEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set up the encoder\n",
    "\n",
    "# m determines the weight given to the prior, that is\n",
    "# the target mean over the entire dataset\n",
    "\n",
    "# bigger m values mean more weight to the prior\n",
    "\n",
    "mean_enc = MEstimateEncoder(cols=None, m=1.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-3 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-3 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-3 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-3 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-3 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-3 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-3 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-3 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-3 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-3 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-3 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MEstimateEncoder(cols=[&#x27;A1&#x27;, &#x27;A4&#x27;, &#x27;A5&#x27;, &#x27;A6&#x27;, &#x27;A7&#x27;, &#x27;A9&#x27;, &#x27;A10&#x27;, &#x27;A12&#x27;, &#x27;A13&#x27;])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" checked><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;MEstimateEncoder<span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>MEstimateEncoder(cols=[&#x27;A1&#x27;, &#x27;A4&#x27;, &#x27;A5&#x27;, &#x27;A6&#x27;, &#x27;A7&#x27;, &#x27;A9&#x27;, &#x27;A10&#x27;, &#x27;A12&#x27;, &#x27;A13&#x27;])</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "MEstimateEncoder(cols=['A1', 'A4', 'A5', 'A6', 'A7', 'A9', 'A10', 'A12', 'A13'])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# with fit, the transformer learns the numerical\n",
    "# representation for each category\n",
    "\n",
    "mean_enc.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "# let's transform train and test set\n",
    "\n",
    "X_train_enc = mean_enc.transform(X_train)\n",
    "X_test_enc = mean_enc.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A1</th>\n",
       "      <th>A2</th>\n",
       "      <th>A3</th>\n",
       "      <th>A4</th>\n",
       "      <th>A5</th>\n",
       "      <th>A6</th>\n",
       "      <th>A7</th>\n",
       "      <th>A8</th>\n",
       "      <th>A9</th>\n",
       "      <th>A10</th>\n",
       "      <th>A11</th>\n",
       "      <th>A12</th>\n",
       "      <th>A13</th>\n",
       "      <th>A14</th>\n",
       "      <th>A15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>596</th>\n",
       "      <td>0.472064</td>\n",
       "      <td>46.08</td>\n",
       "      <td>3.000</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.451588</td>\n",
       "      <td>0.418882</td>\n",
       "      <td>2.375</td>\n",
       "      <td>0.783849</td>\n",
       "      <td>0.701640</td>\n",
       "      <td>8</td>\n",
       "      <td>0.445472</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>396.0</td>\n",
       "      <td>4159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303</th>\n",
       "      <td>0.472064</td>\n",
       "      <td>15.92</td>\n",
       "      <td>2.875</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.621917</td>\n",
       "      <td>0.418882</td>\n",
       "      <td>0.085</td>\n",
       "      <td>0.072146</td>\n",
       "      <td>0.251652</td>\n",
       "      <td>0</td>\n",
       "      <td>0.452459</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>120.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>0.438837</td>\n",
       "      <td>36.33</td>\n",
       "      <td>2.125</td>\n",
       "      <td>0.228011</td>\n",
       "      <td>0.228011</td>\n",
       "      <td>0.498965</td>\n",
       "      <td>0.418882</td>\n",
       "      <td>0.085</td>\n",
       "      <td>0.783849</td>\n",
       "      <td>0.701640</td>\n",
       "      <td>1</td>\n",
       "      <td>0.452459</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>50.0</td>\n",
       "      <td>1187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>351</th>\n",
       "      <td>0.438837</td>\n",
       "      <td>22.17</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.228011</td>\n",
       "      <td>0.228011</td>\n",
       "      <td>0.114084</td>\n",
       "      <td>0.153554</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.072146</td>\n",
       "      <td>0.251652</td>\n",
       "      <td>0</td>\n",
       "      <td>0.452459</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118</th>\n",
       "      <td>0.438837</td>\n",
       "      <td>57.83</td>\n",
       "      <td>7.040</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.424047</td>\n",
       "      <td>0.418882</td>\n",
       "      <td>14.000</td>\n",
       "      <td>0.783849</td>\n",
       "      <td>0.701640</td>\n",
       "      <td>6</td>\n",
       "      <td>0.445472</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>360.0</td>\n",
       "      <td>1332</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           A1     A2     A3        A4        A5        A6        A7      A8  \\\n",
       "596  0.472064  46.08  3.000  0.512223  0.512223  0.451588  0.418882   2.375   \n",
       "303  0.472064  15.92  2.875  0.512223  0.512223  0.621917  0.418882   0.085   \n",
       "204  0.438837  36.33  2.125  0.228011  0.228011  0.498965  0.418882   0.085   \n",
       "351  0.438837  22.17  0.585  0.228011  0.228011  0.114084  0.153554   0.000   \n",
       "118  0.438837  57.83  7.040  0.512223  0.512223  0.424047  0.418882  14.000   \n",
       "\n",
       "           A9       A10  A11       A12       A13    A14   A15  \n",
       "596  0.783849  0.701640    8  0.445472  0.464817  396.0  4159  \n",
       "303  0.072146  0.251652    0  0.452459  0.464817  120.0     0  \n",
       "204  0.783849  0.701640    1  0.452459  0.464817   50.0  1187  \n",
       "351  0.072146  0.251652    0  0.452459  0.464817  100.0     0  \n",
       "118  0.783849  0.701640    6  0.445472  0.464817  360.0  1332  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Let's inspect the transformed data\n",
    "\n",
    "X_train_enc.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A1</th>\n",
       "      <th>A2</th>\n",
       "      <th>A3</th>\n",
       "      <th>A4</th>\n",
       "      <th>A5</th>\n",
       "      <th>A6</th>\n",
       "      <th>A7</th>\n",
       "      <th>A8</th>\n",
       "      <th>A9</th>\n",
       "      <th>A10</th>\n",
       "      <th>A11</th>\n",
       "      <th>A12</th>\n",
       "      <th>A13</th>\n",
       "      <th>A14</th>\n",
       "      <th>A15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>0.472064</td>\n",
       "      <td>45.83</td>\n",
       "      <td>10.50</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.621917</td>\n",
       "      <td>0.418882</td>\n",
       "      <td>5.000</td>\n",
       "      <td>0.783849</td>\n",
       "      <td>0.701640</td>\n",
       "      <td>7</td>\n",
       "      <td>0.445472</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>586</th>\n",
       "      <td>0.438837</td>\n",
       "      <td>64.08</td>\n",
       "      <td>20.00</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.777971</td>\n",
       "      <td>0.602444</td>\n",
       "      <td>17.500</td>\n",
       "      <td>0.783849</td>\n",
       "      <td>0.701640</td>\n",
       "      <td>9</td>\n",
       "      <td>0.445472</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>140</th>\n",
       "      <td>0.472064</td>\n",
       "      <td>31.25</td>\n",
       "      <td>3.75</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.691912</td>\n",
       "      <td>0.602444</td>\n",
       "      <td>0.625</td>\n",
       "      <td>0.783849</td>\n",
       "      <td>0.701640</td>\n",
       "      <td>9</td>\n",
       "      <td>0.445472</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>181.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>492</th>\n",
       "      <td>0.438837</td>\n",
       "      <td>39.25</td>\n",
       "      <td>9.50</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.424047</td>\n",
       "      <td>0.418882</td>\n",
       "      <td>6.500</td>\n",
       "      <td>0.783849</td>\n",
       "      <td>0.701640</td>\n",
       "      <td>14</td>\n",
       "      <td>0.452459</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>240.0</td>\n",
       "      <td>4607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>350</th>\n",
       "      <td>0.472064</td>\n",
       "      <td>26.17</td>\n",
       "      <td>2.00</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.512223</td>\n",
       "      <td>0.272142</td>\n",
       "      <td>0.241546</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.072146</td>\n",
       "      <td>0.251652</td>\n",
       "      <td>0</td>\n",
       "      <td>0.445472</td>\n",
       "      <td>0.464817</td>\n",
       "      <td>276.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           A1     A2     A3        A4        A5        A6        A7      A8  \\\n",
       "14   0.472064  45.83  10.50  0.512223  0.512223  0.621917  0.418882   5.000   \n",
       "586  0.438837  64.08  20.00  0.512223  0.512223  0.777971  0.602444  17.500   \n",
       "140  0.472064  31.25   3.75  0.512223  0.512223  0.691912  0.602444   0.625   \n",
       "492  0.438837  39.25   9.50  0.512223  0.512223  0.424047  0.418882   6.500   \n",
       "350  0.472064  26.17   2.00  0.512223  0.512223  0.272142  0.241546   0.000   \n",
       "\n",
       "           A9       A10  A11       A12       A13    A14   A15  \n",
       "14   0.783849  0.701640    7  0.445472  0.464817    0.0     0  \n",
       "586  0.783849  0.701640    9  0.445472  0.464817    0.0  1000  \n",
       "140  0.783849  0.701640    9  0.445472  0.464817  181.0     0  \n",
       "492  0.783849  0.701640   14  0.452459  0.464817  240.0  4607  \n",
       "350  0.072146  0.251652    0  0.445472  0.464817  276.0     1  "
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Let's inspect the transformed data\n",
    "\n",
    "X_test_enc.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fsml",
   "language": "python",
   "name": "fsml"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "921.556px",
    "left": "0px",
    "right": "1827px",
    "top": "110.444px",
    "width": "165px"
   },
   "toc_section_display": "block",
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
